{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "A99Pp0T7A9BM"
   },
   "outputs": [],
   "source": [
    "!pip install adalflow[openai,groq,faiss-cpu]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip uninstall httpx anyio -y\n",
    "!pip install \"anyio>=3.1.0,<4.0\"\n",
    "!pip install httpx==0.24.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "y2SVUBNeBMy5"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from getpass import getpass\n",
    "\n",
    "# You can use a setup_env file to set the OPENAI_API_KEY too\n",
    "# (ensure you setup OPENAI_API_KEY in your project .env file) using the following commands:\n",
    "# from adalflow.utils import setup_env\n",
    "\n",
    "# Prompt user to enter their API keys securely\n",
    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
    "\n",
    "# Set environment variables\n",
    "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n",
    "\n",
    "print(\"API keys have been set.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "RWWG9WRt2r9L",
    "outputId": "faad52a8-47f5-48bc-e2c3-17a5aea21254"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 788.85it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document(id=6374a3e5-2ef9-40ba-a7b3-e18c2b466390, text='Example text. More example text. ', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
      "Document(id=b46045ba-3ebb-4e66-93d5-ece2d6ace3de, text='text. Even more text to ', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
      "Document(id=eba5555b-e6d6-4ca1-8452-af22295e68f8, text='to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
    "from adalflow.core.types import Document\n",
    "\n",
    "# Configure the splitter settings\n",
    "text_splitter = TextSplitter(split_by=\"word\", chunk_size=5, chunk_overlap=1)\n",
    "\n",
    "# Example document\n",
    "doc = Document(\n",
    "    text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n",
    ")\n",
    "\n",
    "# Execute the splitting\n",
    "splitted_docs = text_splitter.call(documents=[doc])\n",
    "\n",
    "for doc in splitted_docs:\n",
    "    print(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "LioyB3eCAOs8",
    "outputId": "11cddc1c-608a-4027-830f-fe30a882a766"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 489.02it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Document(id=b0c308f2-73d2-44cf-aaf2-63e8f87198e4, text='Example text. More example', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
      "Document(id=3a37adff-c8ac-4cff-8b5e-9c68e0de9772, text=' text. Even more text', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
      "Document(id=e1b56768-7918-4a94-8f08-a01161cb2dcf, text=' to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
    "from adalflow.core.types import Document\n",
    "\n",
    "# Configure the splitter settings\n",
    "text_splitter = TextSplitter(split_by=\"token\", chunk_size=5, chunk_overlap=0)\n",
    "\n",
    "doc = Document(\n",
    "    text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n",
    ")\n",
    "\n",
    "splitted_docs = text_splitter.call(documents=[doc])\n",
    "\n",
    "for doc in splitted_docs:\n",
    "    print(doc)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
